import scvelo as scv
import scanpy as sc
import cellrank as cr
import loompy as lp
import numpy as np
import pandas as pd
import re
import os
import sys
scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo')
cr.settings.verbosity = 2
import warnings
warnings.simplefilter('ignore', category=UserWarning)
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter('ignore', category=DeprecationWarning)
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
sys.path.append('/research/peer/fdeckert/scFacility/script/')
import dirFacility
adata_1 = dirFacility.dir2adata('data/object/seurat_sct_ery_nacl/', assay='SCT', slot='scale.data')
adata_2 = dirFacility.dir2adata('data/object/seurat_sct_ery_cpg/', assay='SCT', slot='scale.data')
sc.pp.pca(adata_1, n_comps=90, svd_solver='arpack', zero_center=False, use_highly_variable=False)
sc.pp.pca(adata_2, n_comps=90, svd_solver='arpack', zero_center=False, use_highly_variable=False)
# Compute neighbor graph
sc.pp.neighbors(adata_1, n_neighbors=5, n_pcs=90)
sc.pp.neighbors(adata_2, n_neighbors=5, n_pcs=90)
# Denoise neighbor graph
sc.tl.diffmap(adata_1)
sc.pp.neighbors(adata_1, n_neighbors=10, use_rep='X_diffmap')
sc.tl.diffmap(adata_2)
sc.pp.neighbors(adata_2, n_neighbors=10, use_rep='X_diffmap')
# Clustering and PAGA
sc.tl.leiden(adata_1, resolution=0.8)
sc.tl.leiden(adata_2, resolution=0.8)
sc.tl.paga(adata_1, groups='leiden')
sc.tl.paga(adata_2, groups='leiden')
... storing 'orig.ident' as categorical ... storing 'sample_name' as categorical ... storing 'sample_rep' as categorical ... storing 'tissue' as categorical ... storing 'treatment' as categorical ... storing 'sample_group' as categorical ... storing 'sample_path' as categorical ... storing 'sample_dir' as categorical ... storing 'cellranger_class' as categorical ... storing 'qc_class' as categorical ... storing 'main_labels' as categorical ... storing 'fine_labels' as categorical ... storing 'cc_phase_class' as categorical ... storing 'orig.ident' as categorical ... storing 'sample_name' as categorical ... storing 'sample_rep' as categorical ... storing 'tissue' as categorical ... storing 'treatment' as categorical ... storing 'sample_group' as categorical ... storing 'sample_path' as categorical ... storing 'sample_dir' as categorical ... storing 'cellranger_class' as categorical ... storing 'qc_class' as categorical ... storing 'main_labels' as categorical ... storing 'fine_labels' as categorical ... storing 'cc_phase_class' as categorical
HSCs: Procr
Erythroids: Gata1, Klf1, Epor, Gypa, Hba-a2, Hba-1, Spi1
Neutrophils: Elane, Cebpe, Ctsg, Mpo, Gfi1
Monocytes: Irf8, Csf1r, Ctsg, Mpo
Megakaryocytes: Itga2b (encodes protein CD41), Pbx1, Sdpr, Vwf
Basophils: Mcpt8, Prss34
B cells: Cd19, Vpreb2, Cd79a
Mast cells: Cma1, Gzmb, c-kit
Mast cells & Basophils: Ms4a2, Fcer1a, Cpa3
marker_genes = ['Gata2', 'Gata1', 'Elane', 'Irf8', 'Csf1r', 'Itga2b', 'Pbx1', 'Prss34', 'Ms4a2']
marker_genes_1 = [x for x in marker_genes if x in adata_1.var_names]
marker_genes_2 = [x for x in marker_genes if x in adata_2.var_names]
marker_genes = list(set(marker_genes_1) & set(marker_genes_2))
print(marker_genes)
len(marker_genes)
['Prss34', 'Csf1r', 'Irf8', 'Itga2b', 'Elane', 'Gata2', 'Ms4a2', 'Pbx1']
8
sc.pl.paga(adata_1, color=['tissue', 'cc_phase_class', 'fine_labels', 'pHb_RNA'] + marker_genes[0:2])
sc.pl.paga(adata_1, color= marker_genes[2:10])
sc.pl.paga(adata_2, color=['tissue', 'cc_phase_class', 'fine_labels', 'pHb_RNA'] + marker_genes[0:2])
sc.pl.paga(adata_2, color= marker_genes[2:10])
# Recompute embedding suing PAGA-initialization
sc.tl.draw_graph(adata_1, init_pos='leiden')
sc.tl.draw_graph(adata_2, init_pos='leiden')
sc.pl.draw_graph(adata_1, color=['tissue', 'cc_phase_class', 'fine_labels', 'pHb_RNA'] + marker_genes, legend_loc='on data', ncols=6)
sc.pl.draw_graph(adata_2, color=['tissue', 'cc_phase_class', 'fine_labels', 'pHb_RNA'] + marker_genes, legend_loc='on data', ncols=6)
adata_v = scv.read_loom('data/object/velocyto.loom')
if not adata_v.var_names.is_unique: adata_v.var_names_make_unique()
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
adata_v_1 = adata_v[adata_1.obs.index].copy()
adata_v_2 = adata_v[adata_2.obs.index].copy()
# Meta data to adata velocyto
adata_v_1.obs = adata_v_1.obs.merge(adata_1.obs, how='left', left_index=True, right_index=True, suffixes=('', ''))
adata_v_2.obs = adata_v_2.obs.merge(adata_2.obs, how='left', left_index=True, right_index=True, suffixes=('', ''))
adata_v_1.obs['sample_name'] = adata_v_1.obs['sample_name'].astype('category')
scv.pl.proportions(adata_v_1, groupby='sample_name')
adata_v_2.obs['sample_name'] = adata_v_2.obs['sample_name'].astype('category')
scv.pl.proportions(adata_v_2, groupby='sample_name')
scv.pp.filter_and_normalize(adata_v_1, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_v_1)
sc.pp.neighbors(adata_v_1, n_pcs=30, n_neighbors=30)
scv.pp.filter_and_normalize(adata_v_2, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_v_2)
sc.pp.neighbors(adata_v_2, n_pcs=30, n_neighbors=30)
Filtered out 27583 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X. Filtered out 27442 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X.
scv.pp.moments(adata_v_1, n_pcs=30, n_neighbors=20)
scv.pp.moments(adata_v_2, n_pcs=30, n_neighbors=20)
computing moments based on connectivities
finished (0:00:00) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
computing moments based on connectivities
finished (0:00:01) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
scv.tl.recover_dynamics(adata_v_1, n_jobs=8)
scv.tl.recover_dynamics(adata_v_2, n_jobs=8)
recovering dynamics (using 8/32 cores)
finished (0:06:58) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
recovering dynamics (using 8/32 cores)
finished (0:09:39) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata_v_1, mode='dynamical')
scv.tl.velocity(adata_v_2, mode='dynamical')
computing velocities
finished (0:00:03) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocities
finished (0:00:06) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
scv.tl.velocity_graph(adata_v_1)
scv.tl.velocity_graph(adata_v_2)
computing velocity graph (using 1/32 cores)
finished (0:00:08) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity graph (using 1/32 cores)
finished (0:00:16) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
adata_v_1.write('data/object/seurat_sct_ery_nacl/scvelo.h5ad')
adata_v_2.write('data/object/seurat_sct_ery_cpg/scvelo.h5ad')
# adata_v_1 = sc.read_h5ad('data/object/seurat_sct_nacl/scvelo.h5ad')
# adata_v_2 = sc.read_h5ad('data/object/seurat_sct_cpg/scvelo.h5ad')
# scv.pl.velocity_embedding_stream(adata_v_1, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")
# scv.pl.velocity_embedding_stream(adata_v_2, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")